
clear all
set more off
capture log close

*Specify directories here
global dta "C:\Users\zhenshanchen\Documents\RP from VC\Valuing public good WTP\dta"
global results "C:\Users\zhenshanchen\Documents\RP from VC\Valuing public good WTP\Results"

*Simulate errors in elicited probabilities

********************************************************************
*  Probability as a variable with errors - Scenario II with errors *
********************************************************************
*--------Scenario 2-model subjective probability as a variable with errors
*It's the same P_D1 and P_D0, just that they are known to researchers in the estimation
clear all
set more off

foreach n in 1 2 3 4 5 6 7 8 9 10{
gen varname`n'=.
}
drop if _n>=1
save "$dta/MonteCarlo_Results_error1.dta",replace
save "$dta/MonteCarlo_Results_error2.dta",replace
save "$dta/MonteCarlo_Results_error3.dta",replace

global P_D1 "P_D1"
global P_D0 "P_D0"
 program define dh_dc_2
    version 14
    args lnf beta1 beta2 beta6 beta3
    tempvar d p non_l z p0 p1
    quietly gen double `d'=($ML_y1==1)
    quietly gen double `p'=normprob(`beta1')
	quietly gen double `non_l'=(`beta3'-$P_D1*`beta6')/($P_D1-$P_D0)
	quietly gen double `z'=normprob((`beta2'+`non_l'))  /*probit*/
    *quietly gen double `z'=exp(`beta2'+`non_l')/(1+exp(`beta2'+`non_l'))  /*logit*/
    quietly gen double `p0'=1-(`p'*`z')
    quietly gen double `p1'=(`p'*`z')
    quietly replace `lnf'=(1-`d')*ln(`p0')+`d'*ln(`p1')
 end 
*only investigate with 5000 observations 

*Random measurement error: delta1 (random proportional increase or decrease of the original probability)
clear
set seed 12345678

forval i = 1/500{
clear
local N=5000
set obs `N'
di "i " `i'
**********
* Setup  *
**********
*Generate error terms
drawnorm e
gen ee = -log(-log(runiform()))
drawnorm ee1
*gen ee1 = invnorm(runiform())
gen ee2 = invnorm(runiform())

*Generate participation variables
gen Age=runiform(18,70)
gen Inc=exp(rnormal(4,1))

*Generate participation parameters
scalar gama_Age=.003
scalar gama_Inc=.0014
scalar gama_C=-1

*Participation decision and prob 
gen Participate=gama_C+gama_Age*Age+gama_Inc*Inc+e

*Generate donation variables
gen A=exp(rnormal(0,2))
*Generate utility parameters
scalar alfa_A=.4
scalar alfa_Age=.05
scalar alfa_Inc=.027
scalar beta=.1

*------- Senario 2
*Generate subjective probability variables
gen epsilon=rnormal(0,.1)
gen D=runiform(1000,5000)
scalar D_threshold=D
drop D
di D_threshold
gen D_bar=D_threshold
gen c_solicAmount=floor(runiform(5,120))
*Generate probability parameters
scalar miu=0
scalar tao=.02
scalar ro=-.1/`N'

*Generate probability as observed with errors
gen delta1=rnormal(0,1)
egen delta1_max=max(abs(delta1))
gen d_p=delta1/delta1_max

gen P_D1=normprob(miu+tao*c_solicAmount+ro*D_threshold)
gen P_D0=normprob(miu+ro*D_threshold)

*Notice: though they are generated from functions,  the probabilities will serve as observed variables in the estimation.

*Now simulate the decisions 
*Generate participation decision
gen Part=(Participate>0)
gen WG=1
scalar wg=WG
*Generate expected utility comparison
gen Delta_EU = 1+(P_D1-P_D0)*(alfa_A*A+alfa_Age*Age+alfa_Inc*Inc+ee1)-beta*P_D1*c_solicAmount
gen Di=(Part==1&Delta_EU>0)
tab Di

************
* Estimate *
************
*Now observed probabilities include errors
replace P_D1=normprob(miu+tao*c_solicAmount+ro*D_threshold)*(1+d_p)
replace P_D0=normprob(miu+ro*D_threshold)*(1+d_p)

 *  B C Education
 global varlist_ydc "A Age Inc "
 * Education
 global varlist_d "Age Inc "

 global price "c_solicAmount"
 sum $varlist_d

 probit Part $varlist_d
 mat list e(b)
 mat bparticipation=e(b)

 mat b=(bparticipation,alfa_A,alfa_Age,alfa_Inc,beta,wg)
 *D_bar
 ml model lf dh_dc_2 (hurdle: Di=$varlist_d)(above: Di=$varlist_ydc,nocons)(beta:$price,nocons)(warm_glow:)
 ml init b,copy
 
 cap {
 ml maximize,iterate(500) dif
 estat ic

 mat list e(b)
 di _b[above: A]
 di _b[above: Age]
 di _b[above: Inc]

 mat V=e(V)
 di V[1,1]
 gen SEsq_A=V[4,4]
 gen SEsq_Age=V[5,5]
 gen SEsq_Inc=V[6,6]
 gen est_A=(V[4,4]!=0)
 gen est_Age=(V[5,5]!=0)
 gen est_Inc=(V[6,6]!=0)
 
 gen WTP_est=(_b[above: A]*A*est_A+_b[above: Age]*Age*est_Age+_b[above: Inc]*Inc*est_Inc)/_b[beta:c_solicAmount]
 sum WTP_est
 scalar WTP_e_scalar=r(mean)
 scalar WTP_sd_scalar=r(sd)
 scalar WTP_95Ci_lb=r(mean)-r(sd)*1.96
 scalar WTP_95Ci_ub=r(mean)+r(sd)*1.96
 scalar WTP_90Ci_lb=r(mean)-r(sd)*1.645
 scalar WTP_90Ci_ub=r(mean)+r(sd)*1.645
 
 gen WTP=(.4*7.39+.05*44+.027*90)/.1

 sum WTP
 scalar WTP_o=r(mean)
 scalar delta_WTP = WTP_e_scalar-WTP_o
 scalar total_benefit=e(N) * WTP_o
 scalar benefit_cost_ratio=total_benefit/D_threshold
 
 di WTP_95Ci_lb
 di WTP_95Ci_ub
 gen CI95_cover=1 if WTP_95Ci_ub>WTP_o & WTP_o>WTP_95Ci_lb
 scalar CI95cover=CI95_cover
 gen CI90_cover=1 if WTP_90Ci_ub>WTP_o & WTP_o>WTP_90Ci_lb
 scalar CI90cover=CI90_cover
 count if Di==1
 scalar N_donor=r(N)
 
 mat R=(`i',delta_WTP, WTP_e_scalar, WTP_o, CI90cover, CI95cover, e(N), total_benefit, D_threshold, benefit_cost_ratio,_b[beta:c_solicAmount],_b[above: A],_b[above: Age],_b[above: Inc],N_donor)
 mat list R
 
 clear 
 tempfile R_temp
 svmat R, names(varname)
 save "$dta/temp_result.dta",replace
 
 use "$dta/MonteCarlo_Results_error1.dta",clear
 append using "$dta/temp_result.dta"
 save "$dta/MonteCarlo_Results_error1.dta",replace
 }
 }

 
 
 
 
 
 
*Endogeneity: delta2
clear
set seed 12345678

forval i = 1/500{
clear
local N=5000
set obs `N'
di "i " `i'
**********
* Setup  *
**********
*Generate error terms
drawnorm e
gen ee = -log(-log(runiform()))
drawnorm ee1
gen ee2 = invnorm(runiform())

*Generate participation variables
gen Age=runiform(18,70)
gen Inc=exp(rnormal(4,1))
*Generate participation parameters
scalar gama_Age=.003
scalar gama_Inc=.0014
scalar gama_C=-1

*Participation decision and prob 
gen Participate=gama_C+gama_Age*Age+gama_Inc*Inc+e

*Generate donation variables
gen A=exp(rnormal(0,2))

*Generate utility parameters
scalar alfa_A=.4
scalar alfa_Age=.05
scalar alfa_Inc=.027
scalar beta=.1

*------- Senario 2
*Generate subjective probability variables
gen epsilon=rnormal(0,.1)
gen D=runiform(1000,5000)
scalar D_threshold=D
drop D
di D_threshold
gen D_bar=D_threshold
gen c_solicAmount=floor(runiform(5,120))
*Generate probability parameters
scalar miu=0
scalar tao=.02
scalar ro=-.1/`N'

*Generate probability as observed with errors
gen delta2=.5*A
egen delta2_max=max(abs(delta2))
gen d_p=delta2/delta2_max
gen P_D1=normprob(miu+tao*c_solicAmount+ro*D_threshold)
gen P_D0=normprob(miu+ro*D_threshold)

*Notice: though they are generated from functions, the probabilities will serve as observed variables in the estimation.

*Now simulate the decisions 
*Generate participation decision
gen Part=(Participate>0)
gen WG=1
scalar wg=WG
*Generate expected utility comparison
gen Delta_EU = 1+(P_D1-P_D0)*(alfa_A*A+alfa_Age*Age+alfa_Inc*Inc+ee1)-beta*P_D1*c_solicAmount
gen Di=(Part==1&Delta_EU>0)
tab Di

************
* Estimate *
************
*Now observed probabilities include errors
replace P_D1=normprob(miu+tao*c_solicAmount+ro*D_threshold)*(1+d_p)
replace P_D0=normprob(miu+ro*D_threshold)*(1+d_p)

 *  B C Education
 global varlist_ydc "A Age Inc "
 * Education
 global varlist_d "Age Inc "

 global price "c_solicAmount"
 sum $varlist_d

 probit Part $varlist_d
 mat list e(b)
 mat bparticipation=e(b)

 mat b=(bparticipation,alfa_A,alfa_Age,alfa_Inc,beta,wg)
 *D_bar
 ml model lf dh_dc_2 (hurdle: Di=$varlist_d)(above: Di=$varlist_ydc,nocons)(beta:$price,nocons)(warm_glow:)
 ml init b,copy
 
 cap {
 ml maximize,iterate(500) dif
 estat ic

 mat list e(b)
 di _b[above: A]
 di _b[above: Age]
 di _b[above: Inc]

 mat V=e(V)
 di V[1,1]
 gen SEsq_A=V[4,4]
 gen SEsq_Age=V[5,5]
 gen SEsq_Inc=V[6,6]
 gen est_A=(V[4,4]!=0)
 gen est_Age=(V[5,5]!=0)
 gen est_Inc=(V[6,6]!=0)
 
 gen WTP_est=(_b[above: A]*A*est_A+_b[above: Age]*Age*est_Age+_b[above: Inc]*Inc*est_Inc)/_b[beta:c_solicAmount]
 sum WTP_est
 scalar WTP_e_scalar=r(mean)
 scalar WTP_sd_scalar=r(sd)
 scalar WTP_95Ci_lb=r(mean)-r(sd)*1.96
 scalar WTP_95Ci_ub=r(mean)+r(sd)*1.96
 scalar WTP_90Ci_lb=r(mean)-r(sd)*1.645
 scalar WTP_90Ci_ub=r(mean)+r(sd)*1.645
 
 *gen WTP=(.4*A+.05*Age+.027*Inc)/.1
  gen WTP=(.4*7.39+.05*44+.027*90)/.1

 sum WTP
 scalar WTP_o=r(mean)
 scalar delta_WTP = WTP_e_scalar-WTP_o
 scalar total_benefit=e(N) * WTP_o
 scalar benefit_cost_ratio=total_benefit/D_threshold
 
 di WTP_95Ci_lb
 di WTP_95Ci_ub
 gen CI95_cover=1 if WTP_95Ci_ub>WTP_o & WTP_o>WTP_95Ci_lb
 scalar CI95cover=CI95_cover
 gen CI90_cover=1 if WTP_90Ci_ub>WTP_o & WTP_o>WTP_90Ci_lb
 scalar CI90cover=CI90_cover
 count if Di==1
 scalar N_donor=r(N)
 
 mat R=(`i',delta_WTP, WTP_e_scalar, WTP_o, CI90cover, CI95cover, e(N), total_benefit, D_threshold, benefit_cost_ratio,_b[beta:c_solicAmount],_b[above: A],_b[above: Age],_b[above: Inc],N_donor)
 mat list R
 
 clear 
 tempfile R_temp
 svmat R, names(varname)
 save "$dta/temp_result.dta",replace
 
 use "$dta/MonteCarlo_Results_error2.dta",clear
 append using "$dta/temp_result.dta"
 save "$dta/MonteCarlo_Results_error2.dta",replace
 }
 }
 
 *Random measurement error + Endogeneity: delta3
clear
set seed 12345678

forval i = 1/500{
clear
local N=5000
set obs `N'
di "i " `i'
**********
* Setup  *
**********
*Generate error terms
drawnorm e
gen ee = -log(-log(runiform()))
drawnorm ee1
gen ee2 = invnorm(runiform())

*Generate participation variables
gen Age=runiform(18,70)
gen Inc=exp(rnormal(4,1))
*Generate participation parameters
scalar gama_Age=.003
scalar gama_Inc=.0014
scalar gama_C=-1

*Participation decision and prob 
gen Participate=gama_C+gama_Age*Age+gama_Inc*Inc+e

*Generate donation variables
gen A=exp(rnormal(0,2))

*Generate utility parameters
scalar alfa_A=.4
scalar alfa_Age=.05
scalar alfa_Inc=.027
scalar beta=.1

*------- Senario 2
*Generate subjective probability variables
gen epsilon=rnormal(0,.1)
gen D=runiform(1000,5000)
scalar D_threshold=D
drop D
di D_threshold
gen D_bar=D_threshold
gen c_solicAmount=floor(runiform(5,120))
*Generate probability parameters
scalar miu=0
scalar tao=.02
scalar ro=-.1/`N'

*Generate probability as observed with errors
gen delta3=rnormal(0,1)+.5*A
egen delta3_max=max(abs(delta3))
gen d_p=delta3/delta3_max
gen P_D1=normprob(miu+tao*c_solicAmount+ro*D_threshold)
gen P_D0=normprob(miu+ro*D_threshold)

*Notice: though they are generated from functions, the probabilities will serve as observed variables in the estimation.

*Now simulate the decisions 
*Generate participation decision
gen Part=(Participate>0)
gen WG=1
scalar wg=WG
*Generate expected utility comparison
gen Delta_EU = 1+(P_D1-P_D0)*(alfa_A*A+alfa_Age*Age+alfa_Inc*Inc+ee1)-beta*P_D1*c_solicAmount
gen Di=(Part==1&Delta_EU>0)
tab Di

************
* Estimate *
************
*Now observed probabilities include errors
replace P_D1=normprob(miu+tao*c_solicAmount+ro*D_threshold)*(1+d_p)
replace P_D0=normprob(miu+ro*D_threshold)*(1+d_p)

 *  B C Education
 global varlist_ydc "A Age Inc "
 * Education
 global varlist_d "Age Inc "

 global price "c_solicAmount"
 sum $varlist_d

 probit Part $varlist_d
 mat list e(b)
 mat bparticipation=e(b)

 mat b=(bparticipation,alfa_A,alfa_Age,alfa_Inc,beta,wg)
 *D_bar
 ml model lf dh_dc_2 (hurdle: Di=$varlist_d)(above: Di=$varlist_ydc,nocons)(beta:$price,nocons)(warm_glow:)
 ml init b,copy
 
 cap {
 ml maximize,iterate(500) dif
 estat ic

 mat list e(b)
 di _b[above: A]
 di _b[above: Age]
 di _b[above: Inc]

 mat V=e(V)
 di V[1,1]
 gen SEsq_A=V[4,4]
 gen SEsq_Age=V[5,5]
 gen SEsq_Inc=V[6,6]
 gen est_A=(V[4,4]!=0)
 gen est_Age=(V[5,5]!=0)
 gen est_Inc=(V[6,6]!=0)
 
 gen WTP_est=(_b[above: A]*A*est_A+_b[above: Age]*Age*est_Age+_b[above: Inc]*Inc*est_Inc)/_b[beta:c_solicAmount]
 sum WTP_est
 scalar WTP_e_scalar=r(mean)
 scalar WTP_sd_scalar=r(sd)
 scalar WTP_95Ci_lb=r(mean)-r(sd)*1.96
 scalar WTP_95Ci_ub=r(mean)+r(sd)*1.96
 scalar WTP_90Ci_lb=r(mean)-r(sd)*1.645
 scalar WTP_90Ci_ub=r(mean)+r(sd)*1.645
 
  gen WTP=(.4*7.39+.05*44+.027*90)/.1

 sum WTP
 scalar WTP_o=r(mean)
 scalar delta_WTP = WTP_e_scalar-WTP_o
 scalar total_benefit=e(N) * WTP_o
 scalar benefit_cost_ratio=total_benefit/D_threshold
 
 di WTP_95Ci_lb
 di WTP_95Ci_ub
 gen CI95_cover=1 if WTP_95Ci_ub>WTP_o & WTP_o>WTP_95Ci_lb
 scalar CI95cover=CI95_cover
 gen CI90_cover=1 if WTP_90Ci_ub>WTP_o & WTP_o>WTP_90Ci_lb
 scalar CI90cover=CI90_cover
 count if Di==1
 scalar N_donor=r(N)
 
 mat R=(`i',delta_WTP, WTP_e_scalar, WTP_o, CI90cover, CI95cover, e(N), total_benefit, D_threshold, benefit_cost_ratio,_b[beta:c_solicAmount],_b[above: A],_b[above: Age],_b[above: Inc],N_donor)
 mat list R
 
 clear 
 tempfile R_temp
 svmat R, names(varname)
 save "$dta/temp_result.dta",replace
 
 use "$dta/MonteCarlo_Results_error3.dta",clear
 append using "$dta/temp_result.dta"
 save "$dta/MonteCarlo_Results_error3.dta",replace
 }
 }
 
 
************Compute Error Measurements***************
*Scenario 2-model subjective probability as a variable
* 50000 80000 
foreach n in 1 2 3{

foreach N in 5000{ 
clear all
use "$dta/MonteCarlo_Results_error`n'.dta",clear
quietly {
ren varname1 ID
ren varname2 delta_WTP
ren varname3 WTP_est
ren varname4 WTP_obs
ren varname5 CI90cover
ren varname6 CI95cover
ren varname7 N
ren varname8 total_benefit
ren varname9 total_cost
ren varname10 benefit_cost_ratio
ren varname11 beta
ren varname12 alfa_A
ren varname13 alfa_age
ren varname14 alfa_inc
ren varname15 Ndonors

keep if N==`N'
count if delta_WTP>1000|delta_WTP<-1000
*12 outliners not count
drop if delta_WTP>1000|delta_WTP<-1000

gen order=_n
egen success_N=max(order)
egen total_N=max(ID)
scalar Success_rate=success_N/total_N
di Success_rate

replace delta_WTP=WTP_est-WTP_obs
sum delta_WTP
scalar Mean_bias=r(mean)
di Mean_bias

sum delta_WTP,d
scalar Median_bias=r(p50)
di Median_bias 

gen sq_delta_WTP=delta_WTP^2
sum sq_delta_WTP 
scalar RMSE=sqrt(r(mean))
di RMSE

gen abs_delta_WTP=abs(delta_WTP)
sum abs_delta_WTP,d
scalar MAE=sqrt(r(p50))
di MAE

sum WTP_est
scalar MEAN_est=r(mean)
scalar SD_est=r(sd)
di MEAN_est " " SD_est

sum WTP_obs
scalar MEAN_WTP=r(mean)
di MEAN_WTP

egen CI90_covers=sum(CI90cover)
scalar CI90_coverrate=CI90_covers/success_N
di CI90_coverrate

egen CI95_covers=sum(CI95cover)
scalar CI95_coverrate=CI95_covers/success_N
di CI95_coverrate

sum Ndonors
scalar MEAN_donors=r(mean)
scalar SD_donors=r(sd)
di MEAN_donors
di SD_donors

sum beta
scalar MEAN_beta=r(mean)
scalar SD_beta=r(sd)

sum alfa_A
scalar MEAN_alfa_A=r(mean)
scalar SD_alfa_A=r(sd)

sum alfa_age
scalar MEAN_alfa_age=r(mean)
scalar SD_alfa_age=r(sd)

sum alfa_inc
scalar MEAN_alfa_inc=r(mean)
scalar SD_alfa_inc=r(sd)
}
di "N=" `N'
di "Success_rate " Success_rate " MEAN_est " MEAN_est " SD_est " SD_est " MEAN_WTP " MEAN_WTP   

di "Mean_bias " Mean_bias " Median_bias " Median_bias " RMSE " RMSE " MAE " MAE " CI90_coverrate " CI90_coverrate " CI95_coverrate " CI95_coverrate

mat R = (`N',Success_rate, MEAN_est, SD_est, MEAN_WTP, Mean_bias, Median_bias, RMSE, MAE, CI90_coverrate, CI95_coverrate, MEAN_donors, SD_donors, MEAN_beta, SD_beta, MEAN_alfa_A, SD_alfa_A, MEAN_alfa_age, SD_alfa_age, MEAN_alfa_inc, SD_alfa_inc)

mat coln R = Samplesize Success_rate MEAN_est SD_est MEAN_WTP Mean_bias Median_bias RMSE MAE CI90_coverrate CI95_coverrate MEAN_donors SD_donors MEAN_beta SD_beta MEAN_alfa_A SD_alfa_A MEAN_alfa_age SD_alfa_age MEAN_alfa_inc SD_alfa_inc
mat list R

clear
svmat R, names(col)

export delimited using "$results\Simu_results1_witherrors`n'.csv",replace
}
}